In [1]:
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
Workflow of the project¶
  1. Concatenating multiple files into a single dataframe
  2. Feature Engineering
  3. Data Visualization using Plotly Library
  4. Forecasting the Revenue for the next year using LSTM
Questions to deal with:¶
  1. Most Quantity Ordered Products
  2. Orders Per Month
  3. Orders Per City
  4. Top 10 High Revenue Generating Products
  5. Least 5 Revenue Generating Products
  6. Revenue Generated Monthly
  7. Revenue generated City-wise
  8. Expected Approximate Revenue for the Next Year, month-wise
In [2]:
# Concatenating all files using glob library

joined_files = os.path.join("Sales*.csv")

joined_list = glob.glob(joined_files)

df = pd.concat(map(pd.read_csv, joined_list), ignore_index=True)
In [3]:
# Def function for dataframe details:

def check_df(dataframe, head=5):
    print(" SHAPE ".center(70,'-'))
    print('Rows: {}'.format(dataframe.shape[0]))
    print('Columns: {}'.format(dataframe.shape[1]))
    print(" TYPES ".center(70,'-'))
    print(dataframe.dtypes)
    print(" HEAD ".center(70,'-'))
    print(dataframe.head(head))
    print(" TAIL ".center(70,'-'))
    print(dataframe.tail(head))
    print(" MISSING VALUES ".center(70,'-'))
    print(dataframe.isnull().sum())
    print(" DUPLICATED VALUES ".center(70,'-'))
    print(dataframe.duplicated().sum())
    print(" DESCRIBE ".center(70,'-'))
    print(dataframe.describe())

check_df(df)
------------------------------- SHAPE --------------------------------
Rows: 186850
Columns: 6
------------------------------- TYPES --------------------------------
Order ID            object
Product             object
Quantity Ordered    object
Price Each          object
Order Date          object
Purchase Address    object
dtype: object
-------------------------------- HEAD --------------------------------
  Order ID                     Product Quantity Ordered Price Each  \
0   176558        USB-C Charging Cable                2      11.95   
1      NaN                         NaN              NaN        NaN   
2   176559  Bose SoundSport Headphones                1      99.99   
3   176560                Google Phone                1        600   
4   176560            Wired Headphones                1      11.99   

       Order Date                      Purchase Address  
0  04/19/19 08:46          917 1st St, Dallas, TX 75001  
1             NaN                                   NaN  
2  04/07/19 22:30     682 Chestnut St, Boston, MA 02215  
3  04/12/19 14:38  669 Spruce St, Los Angeles, CA 90001  
4  04/12/19 14:38  669 Spruce St, Los Angeles, CA 90001  
-------------------------------- TAIL --------------------------------
       Order ID                 Product Quantity Ordered Price Each  \
186845   259353  AAA Batteries (4-pack)                3       2.99   
186846   259354                  iPhone                1        700   
186847   259355                  iPhone                1        700   
186848   259356  34in Ultrawide Monitor                1     379.99   
186849   259357    USB-C Charging Cable                1      11.95   

            Order Date                         Purchase Address  
186845  09/17/19 20:56   840 Highland St, Los Angeles, CA 90001  
186846  09/01/19 16:00  216 Dogwood St, San Francisco, CA 94016  
186847  09/23/19 07:39     220 12th St, San Francisco, CA 94016  
186848  09/19/19 17:30   511 Forest St, San Francisco, CA 94016  
186849  09/30/19 00:18   250 Meadow St, San Francisco, CA 94016  
--------------------------- MISSING VALUES ---------------------------
Order ID            545
Product             545
Quantity Ordered    545
Price Each          545
Order Date          545
Purchase Address    545
dtype: int64
------------------------- DUPLICATED VALUES --------------------------
1162
------------------------------ DESCRIBE ------------------------------
        Order ID               Product Quantity Ordered Price Each  \
count     186305                186305           186305     186305   
unique    178438                    20               10         24   
top     Order ID  USB-C Charging Cable                1      11.95   
freq         355                 21903           168552      21903   

        Order Date  Purchase Address  
count       186305            186305  
unique      142396            140788  
top     Order Date  Purchase Address  
freq           355               355  
In [4]:
# Dropping Null Values

df = df.dropna(how='all')
In [5]:
# Value Counts

df['Quantity Ordered'].value_counts()
Out[5]:
Quantity Ordered
1                   168552
2                    13324
3                     2920
4                      806
Quantity Ordered       355
5                      236
6                       80
7                       24
8                        5
9                        3
Name: count, dtype: int64
In [6]:
# Value Counts

df['Price Each'].value_counts()
Out[6]:
Price Each
11.95         21903
14.95         21658
2.99          20641
3.84          20577
11.99         18882
150           15450
99.99         13325
149.99         7507
700            6804
389.99         6230
379.99         6181
600            5490
300            4780
1700           4702
999.99         4128
109.99         4101
400            2056
600.0          1347
Price Each      355
150.0            99
700.0            38
1700.0           22
300.0            20
400.0             9
Name: count, dtype: int64
In [7]:
# Dropping rows with unnecessary values from the features

df.drop(df[df['Quantity Ordered'] == 'Quantity Ordered'].index, inplace = True)
df.drop(df[df['Price Each'] == 'Price Each'].index, inplace = True)
In [8]:
# Converting the datatypes to int and float of respective features

df['Quantity Ordered'] = df['Quantity Ordered'].astype(int)
df['Price Each'] = df['Price Each'].astype(float)
In [9]:
# Converting Feature into Datetime feature

df['Order Date'] = pd.to_datetime(df['Order Date'])
C:\Users\KaMaL RaJiT\AppData\Local\Temp\ipykernel_10908\3171770558.py:3: UserWarning:

Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.

In [10]:
# Extracting City from the Address for ease of Analysis

df['City'] = df['Purchase Address'].str.split(',', expand = True)[1]
In [11]:
# Creating new feature 'Revenue', which is Selling Price * Quantity

df['Revenue'] = df['Quantity Ordered'] * df['Price Each']
In [12]:
# Extracting date, month, quarter from datetime feature

df['Date'] = df['Order Date'].dt.date
df['Month'] = df['Order Date'].dt.month
df['Quarter'] = df['Order Date'].dt.quarter
In [13]:
# Mapping Month names for the feature

df['Month'] = df['Month'].map({1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'June',
                              7:'July', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'})
In [14]:
df.head()
Out[14]:
Order ID Product Quantity Ordered Price Each Order Date Purchase Address City Revenue Date Month Quarter
0 176558 USB-C Charging Cable 2 11.95 2019-04-19 08:46:00 917 1st St, Dallas, TX 75001 Dallas 23.90 2019-04-19 Apr 2
2 176559 Bose SoundSport Headphones 1 99.99 2019-04-07 22:30:00 682 Chestnut St, Boston, MA 02215 Boston 99.99 2019-04-07 Apr 2
3 176560 Google Phone 1 600.00 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 Los Angeles 600.00 2019-04-12 Apr 2
4 176560 Wired Headphones 1 11.99 2019-04-12 14:38:00 669 Spruce St, Los Angeles, CA 90001 Los Angeles 11.99 2019-04-12 Apr 2
5 176561 Wired Headphones 1 11.99 2019-04-30 09:27:00 333 8th St, Los Angeles, CA 90001 Los Angeles 11.99 2019-04-30 Apr 2
In [15]:
df.dtypes
Out[15]:
Order ID                    object
Product                     object
Quantity Ordered             int32
Price Each                 float64
Order Date          datetime64[ns]
Purchase Address            object
City                        object
Revenue                    float64
Date                        object
Month                       object
Quarter                      int32
dtype: object

Exploratory Data Analysis¶

Most Quantity Ordered Products¶

In [16]:
quantity_ordered = df.groupby('Product')['Quantity Ordered'].sum().sort_values(ascending=False).reset_index()

fig1 = px.bar(quantity_ordered, x='Product', y='Quantity Ordered', text = 'Quantity Ordered',
             hover_data=['Product', 'Quantity Ordered'], color='Quantity Ordered')
fig1.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig1.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig1.update_layout(title_text='Most Ordered Products')

fig1.show()

Orders Per Month¶

In [17]:
order_per_month = df.groupby('Month')['Order ID'].nunique().reset_index()

colors = ['lightslategray',] * 12
colors[2] = 'crimson'

fig2 = go.Figure(data=[go.Bar(
    x= order_per_month['Month'],
    y= order_per_month['Order ID'],
    text = order_per_month['Order ID'],
    marker_color=colors
)])
fig2.update_traces(texttemplate="%{text:.2s}", textposition='outside')
fig2.update_layout(uniformtext_minsize=2, uniformtext_mode='hide')
fig2.update_layout(title_text='Highest Orders in a Month')

Orders Per City¶

In [18]:
order_per_city = df.groupby('City')['Order ID'].nunique().reset_index()

colors = ['lightslategray',] * 12
colors[7] = 'crimson'

fig3 = go.Figure(data=[go.Bar(
    x= order_per_city['City'],
    y= order_per_city['Order ID'],
    text = order_per_city['Order ID'],
    marker_color=colors
)])
fig3.update_traces(texttemplate="%{text:.2s}", textposition='outside')
fig3.update_layout(uniformtext_minsize=2, uniformtext_mode='hide')
fig3.update_layout(title_text='Highest Orders Per City')

Top 10 High Revenue Generating Products¶

In [19]:
high_revenue_products = df.groupby('Product')['Revenue'].sum().sort_values(ascending=False).head(10).reset_index()

fig4 = px.line(df, x='Product', y='Revenue', color='Product', markers=True, hover_data='Revenue')
fig4.update_layout(title = "Top Revenue Generated Products")
fig4.show()

Least 5 Revenue Generating Products¶

In [20]:
least_revenue_products = df.groupby('Product')['Revenue'].sum().sort_values(ascending =True).tail(5).reset_index()

fig5 = px.bar(least_revenue_products, x = 'Product', y = 'Revenue')
fig5.update_layout(title="Least 5 Revenue Generated Products")
fig5.show()

Revenue Generated Monthly¶

In [21]:
monthly_revenue = df.groupby('Month')['Revenue'].sum().sort_values(ascending=False).reset_index()

labels = monthly_revenue['Month']
values = monthly_revenue['Revenue']

fig6 = go.Figure(data=[go.Pie(labels=labels, values=values, text = monthly_revenue['Revenue'], pull=[0.2, 0, 0, 0])])
fig6.update_traces(texttemplate="%{text:.2s}", textfont_size=15, textposition='outside',
                  marker=dict(line=dict(color='#000000', width=2)))
fig6.update_layout(title_text='Revenue Generated Monthly')

Revenue generated City-wise¶

In [22]:
revenue_city = df.groupby('City')['Revenue'].sum().sort_values(ascending=False).reset_index()

fig7 = px.scatter(revenue_city, x="Revenue", y="City",
                 size="Revenue", color="City",
                 hover_name="City", log_x=False, size_max=70)
fig7.update_layout(title = 'Scatter of Revenue by City')
fig7.show()

Products based on its price¶

In [23]:
product_price = df.groupby('Product')['Price Each'].max().sort_values(ascending=False).head().reset_index()
product_price
Out[23]:
Product Price Each
0 Macbook Pro Laptop 1700.00
1 ThinkPad Laptop 999.99
2 iPhone 700.00
3 LG Dryer 600.00
4 LG Washing Machine 600.00
In [24]:
fig8 = px.histogram(df, x = 'Revenue', y = 'Quantity Ordered', color = 'City', marginal="box",
                  hover_data = df.columns)
fig8.show()

Important Takeaways from the Analysis¶

1. AAA Batteries is the most ordered product(31,017), followed by AA Batteries(27,635) and USB-C Charging Cable(23,975)¶
2. December has the Highest Orders (24,004), followed by October(19,436)¶
3. December has Highest Revenue of 4.6 Million, followed by October (3.7 Million) and April (3.4 Million)¶
4. San Francisco has Highest Orders(42, 898)¶
5. San Francisco has generated the Most Revenue of 8.2 Million, followed by Los Angeles(5.4 Million)¶
6. Highest Revenue Generated Product are electronics -> Macbook Pro Laptop, iPhone, ThinkPad Laptop, Google Phone¶
7. Least Revenue Generated Product are Lighting Chargin Cable, USB-C Charging Cable, Wired Headphones, AA Batteries¶

Revenue Forecasting¶

In [25]:
# Converting feature into datetime format

df['Date'] = pd.to_datetime(df['Date'])
In [26]:
# Creating a dataframe with only Date and Revenue features
# Setting the new dataframe with date index

new_df = df.groupby('Date')['Revenue'].sum().reset_index()
new_df['Date'] = pd.to_datetime(new_df['Date'])
new_df.set_index('Date', inplace =True)
In [27]:
new_df
Out[27]:
Revenue
Date
2019-01-01 65681.94
2019-01-02 70813.20
2019-01-03 47046.20
2019-01-04 62012.21
2019-01-05 46524.63
... ...
2019-12-28 134015.50
2019-12-29 156024.62
2019-12-30 152319.81
2019-12-31 131454.30
2020-01-01 8670.29

366 rows × 1 columns

In [28]:
revenue_data = new_df['Revenue'].values
In [29]:
# Data scaling
scaler = MinMaxScaler()
revenue_data_scaled = scaler.fit_transform(revenue_data.reshape(-1, 1))
In [30]:
# Prepare sequences of past time steps and corresponding target values
def prepare_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        X.append(data[i:i + n_steps])
        y.append(data[i + n_steps])
    return np.array(X), np.array(y)
In [31]:
n_steps = 30  # Number of past time steps to use for predictions
x_new, y_new = prepare_sequences(revenue_data_scaled, n_steps)
In [32]:
# Splitting the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x_new, y_new, test_size=30, shuffle=False)

# Reshape the data to fit the LSTM model (samples, timesteps, features)
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)
In [33]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=150, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
In [34]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
In [35]:
# Train the model
model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test))
Epoch 1/100
10/10 [==============================] - 14s 354ms/step - loss: 0.0802 - val_loss: 0.1375
Epoch 2/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0247 - val_loss: 0.0398
Epoch 3/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0173 - val_loss: 0.1068
Epoch 4/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0130 - val_loss: 0.0528
Epoch 5/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0123 - val_loss: 0.0682
Epoch 6/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0118 - val_loss: 0.0574
Epoch 7/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0106 - val_loss: 0.0621
Epoch 8/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0101 - val_loss: 0.0513
Epoch 9/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0105 - val_loss: 0.0497
Epoch 10/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0105 - val_loss: 0.0592
Epoch 11/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0101 - val_loss: 0.0481
Epoch 12/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0099 - val_loss: 0.0425
Epoch 13/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0093 - val_loss: 0.0479
Epoch 14/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0090 - val_loss: 0.0387
Epoch 15/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0090 - val_loss: 0.0390
Epoch 16/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0089 - val_loss: 0.0452
Epoch 17/100
10/10 [==============================] - 1s 77ms/step - loss: 0.0088 - val_loss: 0.0445
Epoch 18/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0092 - val_loss: 0.0594
Epoch 19/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0100 - val_loss: 0.0480
Epoch 20/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0096 - val_loss: 0.0417
Epoch 21/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0080 - val_loss: 0.0407
Epoch 22/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0089 - val_loss: 0.0386
Epoch 23/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0091 - val_loss: 0.0376
Epoch 24/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0082 - val_loss: 0.0427
Epoch 25/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0084 - val_loss: 0.0434
Epoch 26/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0088 - val_loss: 0.0435
Epoch 27/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0091 - val_loss: 0.0414
Epoch 28/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0093 - val_loss: 0.0406
Epoch 29/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0082 - val_loss: 0.0356
Epoch 30/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0088 - val_loss: 0.0339
Epoch 31/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0093 - val_loss: 0.0363
Epoch 32/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0080 - val_loss: 0.0452
Epoch 33/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0077 - val_loss: 0.0449
Epoch 34/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0088 - val_loss: 0.0418
Epoch 35/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0082 - val_loss: 0.0386
Epoch 36/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0075 - val_loss: 0.0335
Epoch 37/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0077 - val_loss: 0.0335
Epoch 38/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0079 - val_loss: 0.0343
Epoch 39/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0075 - val_loss: 0.0336
Epoch 40/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0088 - val_loss: 0.0347
Epoch 41/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0072 - val_loss: 0.0381
Epoch 42/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0078 - val_loss: 0.0364
Epoch 43/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0080 - val_loss: 0.0417
Epoch 44/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0077 - val_loss: 0.0364
Epoch 45/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0079 - val_loss: 0.0377
Epoch 46/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0077 - val_loss: 0.0436
Epoch 47/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0075 - val_loss: 0.0371
Epoch 48/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0087 - val_loss: 0.0338
Epoch 49/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0074 - val_loss: 0.0343
Epoch 50/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0072 - val_loss: 0.0353
Epoch 51/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0072 - val_loss: 0.0327
Epoch 52/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0078 - val_loss: 0.0324
Epoch 53/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0074 - val_loss: 0.0324
Epoch 54/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0082 - val_loss: 0.0323
Epoch 55/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0081 - val_loss: 0.0322
Epoch 56/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0079 - val_loss: 0.0324
Epoch 57/100
10/10 [==============================] - 1s 77ms/step - loss: 0.0076 - val_loss: 0.0376
Epoch 58/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0079 - val_loss: 0.0375
Epoch 59/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0067 - val_loss: 0.0337
Epoch 60/100
10/10 [==============================] - 1s 73ms/step - loss: 0.0069 - val_loss: 0.0369
Epoch 61/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0065 - val_loss: 0.0332
Epoch 62/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0081 - val_loss: 0.0323
Epoch 63/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0069 - val_loss: 0.0354
Epoch 64/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0072 - val_loss: 0.0334
Epoch 65/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0077 - val_loss: 0.0318
Epoch 66/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0071 - val_loss: 0.0321
Epoch 67/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0067 - val_loss: 0.0359
Epoch 68/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0082 - val_loss: 0.0389
Epoch 69/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0088 - val_loss: 0.0332
Epoch 70/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0079 - val_loss: 0.0319
Epoch 71/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0068 - val_loss: 0.0330
Epoch 72/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0073 - val_loss: 0.0338
Epoch 73/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0076 - val_loss: 0.0329
Epoch 74/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0068 - val_loss: 0.0320
Epoch 75/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0072 - val_loss: 0.0334
Epoch 76/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0075 - val_loss: 0.0325
Epoch 77/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0073 - val_loss: 0.0374
Epoch 78/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0070 - val_loss: 0.0347
Epoch 79/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0064 - val_loss: 0.0337
Epoch 80/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0070 - val_loss: 0.0324
Epoch 81/100
10/10 [==============================] - 1s 79ms/step - loss: 0.0073 - val_loss: 0.0316
Epoch 82/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0063 - val_loss: 0.0321
Epoch 83/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0075 - val_loss: 0.0320
Epoch 84/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0071 - val_loss: 0.0316
Epoch 85/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0067 - val_loss: 0.0320
Epoch 86/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0068 - val_loss: 0.0314
Epoch 87/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0068 - val_loss: 0.0321
Epoch 88/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0067 - val_loss: 0.0325
Epoch 89/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0064 - val_loss: 0.0312
Epoch 90/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0067 - val_loss: 0.0314
Epoch 91/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0063 - val_loss: 0.0326
Epoch 92/100
10/10 [==============================] - 1s 75ms/step - loss: 0.0065 - val_loss: 0.0316
Epoch 93/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0068 - val_loss: 0.0348
Epoch 94/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0074 - val_loss: 0.0337
Epoch 95/100
10/10 [==============================] - 1s 76ms/step - loss: 0.0077 - val_loss: 0.0312
Epoch 96/100
10/10 [==============================] - 1s 78ms/step - loss: 0.0078 - val_loss: 0.0372
Epoch 97/100
10/10 [==============================] - 1s 79ms/step - loss: 0.0072 - val_loss: 0.0371
Epoch 98/100
10/10 [==============================] - 1s 79ms/step - loss: 0.0076 - val_loss: 0.0334
Epoch 99/100
10/10 [==============================] - 1s 77ms/step - loss: 0.0075 - val_loss: 0.0333
Epoch 100/100
10/10 [==============================] - 1s 74ms/step - loss: 0.0065 - val_loss: 0.0315
Out[35]:
<keras.src.callbacks.History at 0x2beaa010>
In [36]:
# Forecast for the next 12 months
future_data_scaled = revenue_data_scaled[-n_steps:].reshape(1, n_steps, 1)  # Use the last n_steps as input for forecasting
future_data = []
In [37]:
for _ in range(12):
    prediction = model.predict(future_data_scaled)
    future_data_scaled = np.append(future_data_scaled[:, 1:, :], prediction.reshape(1, 1, 1), axis=1)
    future_data.append(prediction[0, 0])
1/1 [==============================] - 2s 2s/step
1/1 [==============================] - 0s 34ms/step
1/1 [==============================] - 0s 40ms/step
1/1 [==============================] - 0s 38ms/step
1/1 [==============================] - 0s 38ms/step
1/1 [==============================] - 0s 41ms/step
1/1 [==============================] - 0s 41ms/step
1/1 [==============================] - 0s 35ms/step
1/1 [==============================] - 0s 40ms/step
1/1 [==============================] - 0s 37ms/step
1/1 [==============================] - 0s 39ms/step
1/1 [==============================] - 0s 39ms/step
In [38]:
# Inverse transform the scaled data to get the actual revenue values
forecasted_revenue = scaler.inverse_transform(np.array(future_data).reshape(-1, 1))
In [39]:
# Create a DataFrame to store the forecasted values with the appropriate datetime index
forecast_dates = pd.date_range('2020-01-02', periods=12, freq='M')
forecast_df = pd.DataFrame(forecasted_revenue, index=forecast_dates, columns=['revenue_forecast'])
In [40]:
# Print the forecasted DataFrame
print(forecast_df)
            revenue_forecast
2020-01-31     125885.921875
2020-02-29     116821.453125
2020-03-31     108808.593750
2020-04-30     102679.335938
2020-05-31      98510.632812
2020-06-30      95947.554688
2020-07-31      94501.617188
2020-08-31      93702.320312
2020-09-30      93203.804688
2020-10-31      92770.781250
2020-11-30      92314.195312
2020-12-31      91814.781250
In [ ]: